Intro

msaData <- read_csv("Stanford_MSA_Database.csv")

head(msaData)
msaData %>% dplyr::select(-Description, -'Day of Week', -'Shooter Name', -'Type of Gun - Detailed', -'Targeted Victim/s - Detailed', -'Possible Motive - Detailed', -'History of Mental Illness - Detailed', -starts_with("Data "), -Notes) -> msaDataTrunc

head(msaDataTrunc)
library(rworldmap)
library(ggmap)

states <- map_data(map="state")

#note: mapping algs are very sensitive to variable names

#in our dataset, it's called "State".  BUT, we need it to be "region"

msaDataTrunc %>% rename(region = State) -> msaDataTrunc

library(openintro)     ## students will need to install this library

msaDataTrunc$region %>% 
  tolower() ->
  msaDataTrunc$region

states <- left_join(states, count(msaDataTrunc, region))  #joining map data with shooting data

head(states)
#Note:  needn to determine color on the map.  More shootings means brighter color!

ggplot() +
  geom_map(data=states, 
           map=states, 
           aes(map_id=region, x=long, y=lat, fill=n)) +
  scale_fill_gradient(low="purple", high="orange", guide="colourbar")+
  labs(x = "Longitude", y="Latitude", title ="Cloropleth Map of Mass Shootings")

Heat Map

us <- c(left = -126, bottom = 24.5, right = -66, top = 50)
myMap <- get_stamenmap(us, zoom=5, maptype="toner-lite")



ggmap(myMap) +
  geom_density2d(data=msaDataTrunc, aes(x=Longitude, y=Latitude), size=0.3)+
  scale_fill_gradient(low = "blue", high = "orange") +
  scale_alpha(range = c(0, 0.3), guide = FALSE)+
  labs(x="Longitude", y="Latitude", title="Heat Map of US Mass Shooting Fatalities")+geom_point(data = msaDataTrunc, aes(x=Longitude, y=Latitude), alpha=.5, size=1)

us <- c(left = -119.5, bottom = 33, right = -116, top = 34.5)
myMap <- get_stamenmap(us, zoom=10, maptype="terrain")

help("get_stamenmap")

ggmap(myMap) +
  geom_density2d(data=msaDataTrunc, aes(x=Longitude, y=Latitude), size=0.3)+
  stat_density2d(data=msaDataTrunc, 
                 aes(x=Longitude, y=Latitude, fill=..level.., alpha=..level..),
                 size=0.01, bins=20,
                    geom="polygon") +
  scale_fill_gradient(low = "blue", high = "orange") +
  scale_alpha(range = c(0, 0.3), guide = FALSE)+
  labs(x="Longitude", y="Latitude", title="Heat Map of US Mass Shooting Fatalities")+geom_point(data = msaDataTrunc, aes(x=Longitude, y=Latitude), alpha=.5, size=1)

garlicData <- read_csv("GarlicMustardData.csv")



us <- c(left = -126, bottom = 24.5, right = -66, top = 50)
myMap <- get_stamenmap(us, zoom=5, maptype="terrain")

ggmap(myMap) +
  geom_density2d(data=msaDataTrunc, aes(x=Longitude, y=Latitude), size=0.3)+
  stat_density2d(data=msaDataTrunc, 
                 aes(x=Longitude, y=Latitude, fill=..level.., alpha=..level..),
                 size=0.01, bins=10,
                    geom="polygon") +
  scale_fill_gradient(low = "blue", high = "orange") +
  scale_alpha(range = c(0, 0.3), guide = FALSE)+
  labs(x="Longitude", y="Latitude", title="Heat Map of US Mass Shooting Fatalities")+
  geom_point(data = msaDataTrunc, aes(x=Longitude, y=Latitude), alpha=.5, size=1)+
  annotate('rect', xmin=-97, ymin=31, xmax=-99, ymax=35, col="red", fill="transparent")+
  annotate("text", x=-95, y=40, label="this is text", size=3)+
  annotate('segment', x=-97.12,xend=-97.12, y=31.55,yend=31.55, arrow=arrow(length=unit(0.3,"cm")), size = 1)+
  annotate('segment', x=-97,xend=-97.2, y=31.55,yend=31.55, size = 1)

Playing With ggmap

Let’s play with the garlic mustard data

garlicData <- read_csv("GarlicMustardData.csv")

head(garlicData)
us <- c(left = -126, bottom = 24.5, right = -66, top = 50)
myMap <- get_stamenmap(us, zoom=5, maptype="terrain")

ggmap(myMap)+
  geom_point(data = garlicData, aes(x=Longitude, y=Latitude, color=AvgAdultHeight), alpha=.5, size=1)+
  annotate("segment", x=-88, y=35, xend=-81, yend = 39, color="orange", arrow=arrow(length=unit(.3,"cm")))

Regular Expressions

You guys know how to search in google. And you know how to filter. Ex: iris. Search for all plants with species “setosa”

iris %>% filter(Species=="setosa")

What if you want to do something more advanced?

Let’s practice with the babynames dataset.

library(babynames)

head(babynames)

Example: what if we wanted to find all names with three vowels in a row?

There’s no specific string we could use, there’s lots of possibilities:

aaa

eee

aou

….

Regular expressions are a pattern matching language. Super cool! They give you a whole language to describe what patterns you’re looking for in data.

Useful on your final project especially: what if you’re looking for specific types of data?

If we want to search for vowels, use this regexp:

[aeiou]

This matches any vowel. To find three in a row, stick it together:

[aeiou][aeiou][aeiou]

Let’s try. First, wrangling:

head(babynames)
babynames %>% group_by(name, sex) %>% 
  summarise(total=sum(n)) %>%
  arrange(desc(total)) -> names

head(names)
names %>% filter(name=="Ali")
names %>% filter(grepl("[aeiou]{3}", name, ignore.case = TRUE))

Q: what if we want to find names with three consonants in a row? I.e., three non-vowels?

names %>% filter(grepl("[^aeiouy]{4}", name, ignore.case = TRUE))